package com.curry.apiprovider.crawler.pageProcessor.job;

import com.alibaba.fastjson.JSON;
import com.curry.apiprovider.crawler.Constants;
import com.curry.livehelper.entity.domain.TCityInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.ArrayList;
import java.util.List;


/**
 * 前程无忧的城市编码
 */
public class CityCodeJobPageProcessor implements PageProcessor {
    private static final Site site = Site.me().setRetryTimes(3).setCharset("GB2312").setSleepTime(1000);
    private Logger logger = LoggerFactory.getLogger(CityCodeJobPageProcessor.class);
    private List<TCityInfo> tCityInfoList ;
    @Override
    public void process(Page page) {
        String s = page.getHtml().xpath("//body/text()").get();
        String cityInfo = s.split("=")[1];
        String replace = cityInfo.replace("{", "").replace("}", "");
//        String all = page.getHtml().css("div.layer_class")
//                .css("div.work_position_click_center_right")
//                .get();
        String[] split = replace.split(",");
        tCityInfoList = new ArrayList<>();
        for (int i = 0; i < split.length; i++) {
            String[] arrayCity = split[i].split(":");
            TCityInfo tCityInfo = new TCityInfo();
            tCityInfo.setChannelId(Constants.CHANNEL_JOB);
            tCityInfo.setCityCode(arrayCity[0].replace("'",""));
            tCityInfo.setCityName(arrayCity[1].replace("'",""));
            tCityInfoList.add(tCityInfo);
        }
        logger.warn("插入前程无忧城市编码成功");
        page.putField(Constants.KEY_JOB_CITY_CODE,JSON.toJSONString(tCityInfoList));
    }
    @Override
    public Site getSite() {
        return site;
    }
}
